-
Notifications
You must be signed in to change notification settings - Fork 12.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Implement FP8 SVE intrinsics for widening conversions #118123
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesFull diff: https://github.com/llvm/llvm-project/pull/118123.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
}
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+ // 8-bit floating-point convert to BFloat16/Float16
+ def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+ def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+ // 8-bit floating-point convert to BFloat16/Float16 (top)
+ def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+ def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00000000000000..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include <arm_sme.h>
+#else
+#include <arm_sve.h>
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt1_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt2_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z20test_svcvt2_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt2_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvtlt1_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z22test_svcvtlt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvtlt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvtlt2_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z22test_svcvtlt2_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvtlt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt2_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvt1_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z19test_svcvt1_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt1_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvt2_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z19test_svcvt2_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt2_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvtlt1_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z21test_svcvtlt1_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtlt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt1_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvtlt2_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z21test_svcvtlt2_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt2_f16,_mf8,_fpm)(zn, fpm);
+}
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
new file mode 100644
index 00000000000000..aafd42f798d935
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
@@ -0,0 +1,24 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm %s
+
+#include <arm_sve.h>
+
+void test_features(svmfloat8_t zn, fpm_t fpm) {
+ svcvt1_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt2_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt1_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt2_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt1_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt2_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt1_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt2_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a91616b9556828..13bc5e08d2756f 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3864,3 +3864,20 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
// Neon absolute maximum and minimum
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
+
+//
+// FP8 intrinsics
+//
+let TargetPrefix = "aarch64" in {
+
+// Conversions
+class SVE2_FP8_Cvt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_nxv16i8_ty],
+ [IntrReadMem, IntrInaccessibleMemOnly]>;
+
+def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index fb0eb7a80c6d72..5365a00f3f42ee 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4369,14 +4369,14 @@ let Predicates = [HasNonStreamingSVE2p2orSME2p2] in {
//===----------------------------------------------------------------------===//
let Predicates = [HasSVE2orSME2, HasFP8] in {
// FP8 upconvert
-defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
-defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
-defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
-defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
-defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
-defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
-defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
-defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt", nxv8f16, int_aarch64_sve_fp8_cvt1>;
+defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt", nxv8f16, int_aarch64_sve_fp8_cvt2>;
+defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt", nxv8bf16, int_aarch64_sve_fp8_cvt1>;
+defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt", nxv8bf16, int_aarch64_sve_fp8_cvt2>;
+defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt1>;
+defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt2>;
+defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt1>;
+defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>;
// FP8 downconvert
defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index faaaca3f28d758..58770bf6e274d3 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10733,10 +10733,15 @@ class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Uses = [FPMR, FPCR];
+
+ let mayLoad = 1;
+ let mayStore = 0;
}
-multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic, ValueType vtd, SDPatternOperator op> {
def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+
+ def : SVE_1_Op_Pat<vtd, op, nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
}
// FP8 downconvert
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll
new file mode 100644
index 00000000000000..bf0030462db98f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sme2,+fp8 --force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x bfloat> @cvt1_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf1cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvt2_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf2cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvtlt1_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf1cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvtlt2_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf2cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x half> @cvt1_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f1cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @cvt2_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f2cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+
+define <vscale x 8 x half> @cvtlt1_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f1cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @cvtlt2_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f2cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
|
@llvm/pr-subscribers-llvm-ir Author: Momchil Velikov (momchil-velikov) ChangesFull diff: https://github.com/llvm/llvm-project/pull/118123.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
}
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+ // 8-bit floating-point convert to BFloat16/Float16
+ def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+ def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+ // 8-bit floating-point convert to BFloat16/Float16 (top)
+ def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+ def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00000000000000..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include <arm_sme.h>
+#else
+#include <arm_sve.h>
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt1_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt2_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z20test_svcvt2_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt2_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvtlt1_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z22test_svcvtlt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvtlt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvtlt2_bf16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x bfloat> @_Z22test_svcvtlt2_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svcvtlt2_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt2_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvt1_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z19test_svcvt1_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt1_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvt2_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z19test_svcvt2_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvt2_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvtlt1_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z21test_svcvtlt1_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtlt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt1_f16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svcvtlt2_f16_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z21test_svcvtlt2_f16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> [[ZN]])
+// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+ return SVE_ACLE_FUNC(svcvtlt2_f16,_mf8,_fpm)(zn, fpm);
+}
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
new file mode 100644
index 00000000000000..aafd42f798d935
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
@@ -0,0 +1,24 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm %s
+
+#include <arm_sve.h>
+
+void test_features(svmfloat8_t zn, fpm_t fpm) {
+ svcvt1_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt2_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt1_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt1_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt2_bf16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt2_bf16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt1_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvt2_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt1_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+ svcvtlt2_f16_mf8_fpm(zn, fpm);
+ // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a91616b9556828..13bc5e08d2756f 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3864,3 +3864,20 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
// Neon absolute maximum and minimum
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
+
+//
+// FP8 intrinsics
+//
+let TargetPrefix = "aarch64" in {
+
+// Conversions
+class SVE2_FP8_Cvt
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_nxv16i8_ty],
+ [IntrReadMem, IntrInaccessibleMemOnly]>;
+
+def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt;
+def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index fb0eb7a80c6d72..5365a00f3f42ee 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4369,14 +4369,14 @@ let Predicates = [HasNonStreamingSVE2p2orSME2p2] in {
//===----------------------------------------------------------------------===//
let Predicates = [HasSVE2orSME2, HasFP8] in {
// FP8 upconvert
-defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
-defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
-defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
-defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
-defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
-defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
-defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
-defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt", nxv8f16, int_aarch64_sve_fp8_cvt1>;
+defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt", nxv8f16, int_aarch64_sve_fp8_cvt2>;
+defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt", nxv8bf16, int_aarch64_sve_fp8_cvt1>;
+defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt", nxv8bf16, int_aarch64_sve_fp8_cvt2>;
+defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt1>;
+defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt2>;
+defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt1>;
+defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>;
// FP8 downconvert
defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index faaaca3f28d758..58770bf6e274d3 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10733,10 +10733,15 @@ class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Uses = [FPMR, FPCR];
+
+ let mayLoad = 1;
+ let mayStore = 0;
}
-multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic, ValueType vtd, SDPatternOperator op> {
def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+
+ def : SVE_1_Op_Pat<vtd, op, nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
}
// FP8 downconvert
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll
new file mode 100644
index 00000000000000..bf0030462db98f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sme2,+fp8 --force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x bfloat> @cvt1_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf1cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt1.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvt2_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf2cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvt2.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvtlt1_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt1_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf1cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt1.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @cvtlt2_bf16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt2_bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bf2cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fp8.cvtlt2.nxv8bf16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x half> @cvt1_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f1cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt1.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @cvt2_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvt2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f2cvt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvt2.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+
+define <vscale x 8 x half> @cvtlt1_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt1_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f1cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt1.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @cvtlt2_f16(<vscale x 16 x i8> %s) {
+; CHECK-LABEL: cvtlt2_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: f2cvtlt z0.h, z0.b
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8> %s)
+ ret <vscale x 8 x half> %r
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Momchil,
Thank you for the patch. It LGTM!
I just would like before you push the patch to update the commit message with the prototypes you are implementing in this patch. So we can check with CVT it has implemented without having to look into the patch.
3a9643e
to
cdd8658
Compare
This patch adds the following intrinsics: * 8-bit floating-point convert to half-precision and BFloat16. // Variants are also available for: _bf16 svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); * 8-bit floating-point convert to half-precision and BFloat16 (top). // Variants are also available for: _bf16 svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
cdd8658
to
359e611
Compare
…#118123) This patch adds the following intrinsics: * 8-bit floating-point convert to half-precision and BFloat16. // Variants are also available for: _bf16 svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); * 8-bit floating-point convert to half-precision and BFloat16 (top). // Variants are also available for: _bf16 svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
This patch adds the following intrinsics:
8-bit floating-point convert to half-precision and BFloat16.
// Variants are also available for: _bf16
svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
8-bit floating-point convert to half-precision and BFloat16 (top).
// Variants are also available for: _bf16
svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);